library(text)

train_path <- "/Users/yuwang/Desktop/PMethods/Binary/train_dev.csv"
test_path <- "/Users/yuwang/Desktop/PMethods/Binary/test.csv"
# Read the CSV file into a data frame
train <- read.csv(train_path)
test <- read.csv(test_path)

train_word_embeddings = readRDS("/Users/yuwang/Desktop/PMethods/Binary/train_word_embeddings.rds")
test_word_embeddings = readRDS("/Users/yuwang/Desktop/PMethods/Binary/test_word_embeddings.rds")

for (i in c(500, 1000, 5000)){
  # Record start time
  start_time <- Sys.time()
  model <- textTrain(
    x = train_word_embeddings$texts$text[1:i,], # the predictor variables (i.e., the word embeddings)
    y = train$label[1:i], # the criterion variable (i.e., the rating scale score.)
    force_train_method = "random_forest"
  )
  end_time <- Sys.time()
  duration <- end_time - start_time
  cat("Training size:", i, "\n")
  print(duration)
  cat("Execution duration for training:", duration, "\n")
  
  predictions <- textPredict(model, word_embeddings = test_word_embeddings$texts)
  
  
  # Creating a confusion matrix
  conf_matrix <- table(Predicted = predictions$`text__cv_method="validation_split"pred`, Actual = test$label)
  
  # Extracting True Positives, False Positives, True Negatives, and False Negatives
  TP <- conf_matrix[2, 2]
  TN <- conf_matrix[1, 1]
  FP <- conf_matrix[2, 1]
  FN <- conf_matrix[1, 2]
  
  # Calculating Accuracy
  accuracy <- (TP + TN) / sum(conf_matrix)
  
  # Calculating Precision
  precision <- TP / (TP + FP)
  
  # Calculating Recall
  recall <- TP / (TP + FN)
  
  # Calculating F1
  f1 <- 2 * precision * recall / (precision + recall)
  
  # Printing the results
  cat("Accuracy:", accuracy, "\n")
  cat("Precision:", precision, "\n")
  cat("Recall:", recall, "\n")
  cat("F1:", f1, "\n")
}